# Replication package for 
# "The Economic Leverage of International Organizations in Interstate Disputes"
# Johannes Karreth
# June 30, 2017
# jkarreth@ursinus.edu

# This file: 21_crises_readdata.R
# Purpose: Prepare data for analysis of crises
# Execute <crises_createdata.R> to recreate crises_full.csv

rm(list = ls())
# setwd("...")

library("dplyr")
library("rio")
library("ggplot2")

# Source in functions
source("Functions/theme_jk.R")

dat <- import("crises_full.csv")

dat$igo_ingr23_use = dat$igo_ingr2_use + dat$igo_ingr3_use

# Code serious clashes and wars as 1

# Based on the VIOL variable

dat$viol.majclash <- car::recode(dat$viol, "c(1, 2) = '0'; c(3, 4) = '1'", as.numeric.result = TRUE)
dat$viol.minclash <- car::recode(dat$viol, "c(1, 2, 3) = '0'; c(4) = '1'", as.numeric.result = TRUE)
dat$viol.war <- car::recode(dat$viol, "c(1, 2, 3) = '0'; c(4) = '1'", as.numeric.result = TRUE)

# Based on the SEVVIO variable

dat$sev.majclash <- car::recode(dat$sevviosy, "c(1, 2) = '0'; c(3, 4) = '1'", as.numeric.result = TRUE)
dat$sev.minclash <- car::recode(dat$sevviosy, "c(1, 2, 3) = '0'; c(4) = '1'", as.numeric.result = TRUE)
dat$sev.war <- car::recode(dat$sevviosy, "c(1, 2, 3) = '0'; c(4) = '1'", as.numeric.result = TRUE)

# Controls

dat$gravcr[dat$gravcr == 7] <- NA
dat$territory <- ifelse(dat$gravcr == 3, 1, 0)
dat$highgrav <- ifelse(dat$gravcr == 4 | dat$gravcr == 5 | dat$gravcr == 6, 1, 0)
dat$powdissy.ln <- log(dat$powdissy + 1)
dat$gdppc_low_ln <- log(dat$gdppc_low)
dat$cincdif <- log(abs(dat$cinc1 - dat$cinc2))
dat$continent <- ifelse(dat$continent1 == dat$continent2, dat$continent1, "Different")
dat$continent <- ifelse(dat$continent == "Oceania", "Asia", dat$continent)
dat$continentFactor <- factor(dat$continent)
levels(dat$continentFactor) <- c("Different", "Africa", "Americas", "Asia", "Europe")
dat$igo_allothers <- dat$igo_ingr23_use - dat$igo_lev3_count_use + dat$igo_pb_use
  
# Only post-1945 data

dat <- dat[dat$year > 1945, ]
dat$decade <- ifelse(dat$year < 1950, 0, 
                     ifelse(dat$year >= 1950 & dat$year < 1960, 1,
                            ifelse(dat$year >= 1960 & dat$year < 1970, 2,
                                   ifelse(dat$year >= 1970 & dat$year < 1980, 3,
                                          ifelse(dat$year >= 1980 & dat$year < 1990, 4,
                                                 # ifelse(dat$year >= 1990 & dat$year < 2000, 5, 6))))))
                                                 5)))))
dat$coldWar <- factor(ifelse(dat$year < 1991, 1, 0))

## Merge specific IGO data
igo_spec <- import("hl-igo_dyad.dta")

dat_spec <- merge(x = dat, y = igo_spec, by.x = c("dyadid", "year"), by.y = c("dyadid", "year"), all.x = TRUE, all.y = FALSE)
dat_spec$hligo_noWB <- dat_spec$igo_lev3_count_use - dat_spec$io2400_bin
dat_spec$hligo_noWB <- ifelse(dat_spec$hligo_noWB < 0, 0, dat_spec$hligo_noWB)
dat_spec$hligo_noIMF <- dat_spec$igo_lev3_count_use - dat_spec$io2880_bin
dat_spec$hligo_noIMF <- ifelse(dat_spec$hligo_noIMF < 0, 0, dat_spec$hligo_noIMF)
dat_spec$hligo_noWBIMF <- dat_spec$igo_lev3_count_use - dat_spec$io2880_bin - dat_spec$io2400_bin
dat_spec$hligo_noWBIMF <- ifelse(dat_spec$hligo_noWBIMF < 0, 0, dat_spec$hligo_noWBIMF)
dat_spec$hligo_noIFAD <- dat_spec$igo_lev3_count_use - dat_spec$io2760_bin
dat_spec$hligo_noIFAD <- ifelse(dat_spec$hligo_noIFAD < 0, 0, dat_spec$hligo_noIFAD)

## Multiple crises per dyad per year?
## Here, reduce these to just one case

dat$dyadid_year <- dat$dyadid * 10000 + dat$year

dat_nomult <- mutate(group_by(dat, dyadid_year),
                     viol.majclash_max = max(viol.majclash),
                     obs = n(), 
                     count = 1:n())

head(data.frame(dat_nomult)[, c("dyadid", "year", "viol.majclash", "viol.majclash_max", "obs", "count")], n = 100)

dat_nomult$drop <- ifelse(dat_nomult$obs >= 2 & dat_nomult$viol.majclash < dat_nomult$viol.majclash_max, 1, 0)
dat_nomult$drop <- ifelse(dat_nomult$obs >= 2 & dat_nomult$viol.majclash == 0 & dat_nomult$count > 1, 1, dat_nomult$drop)

head(data.frame(dat_nomult)[, c("dyadid", "year", "viol.majclash", "viol.majclash_max", "obs", "count", "drop")], n = 100)

## Distribution of HLIGOs

m1_eq <- viol.majclash ~ igo_lev3_count_use + highgrav + territory + jointpol7 + rivalry_th + absidealdiff + atopally + obs + count 
m1_mf <- model.frame(m1_eq, data = dat_nomult)
m1_mf_sum <- mutate(arrange(group_by(m1_mf, igo_lev3_count_use), obs),
                    ypos = 1:n())

p_crises_igo_hist <- ggplot(data = data.frame(m1_mf_sum), aes(x = as.factor(igo_lev3_count_use), y = ypos)) + geom_point(aes(shape = as.factor(obs))) + xlab("Joint memberships in IGOs with high leverage") + ylab("Crisis dyads") + theme_jk() + scale_shape_manual(values = c(1, 2, 0), name = "Crisis onsets\nwithin dyad\nwithin same year") + theme(legend.position = c(0.7, 0.75),legend.title = element_text(size = 10))

ggsave(p_crises_igo_hist, file = "Output_Tables-and-Figures/crises_igo_hist.pdf", width = 3.75, height = 6)

dat_nomult <- dat_nomult[dat_nomult$drop == 0, ]

# Use this data frame for analysis to produce crises_nomult_drop1_m1_fd.pdf

## Note that the Korean and Gulf wars cluster many crises
## Collapse these crises for analyses crises_coll

# Dummies for these crises:

dat$Korea <- as.numeric(grepl("KOREAN", dat$crisname_dyad))
dat$Gulf <- as.numeric(grepl("GULF WAR", dat$crisname_dyad))
dat$Korea1950 <- ifelse(dat$Korea == 1 & dat$year == 1950, 1, 0)
dat$Korea1953 <- ifelse(dat$Korea == 1 & dat$year == 1953, 1, 0)
dat$Korea1950s <- ifelse(dat$Korea1950 == 1 | dat$Korea1953 == 1, 1, 0)
dat$Gulf1990 <- ifelse(dat$Gulf == 1 & dat$year == 1990, 1, 0)

dat.noKOR <- dat[dat$Korea1950s == 0, ]
dat.noGUL <- dat[dat$Gulf1990 == 0, ]
dat.noKG <- dat[dat$Korea1950s == 0 && dat$Gulf1990 == 0, ]

# Alternatively, collapse them into one crisis with the US representing all Western countries:
# Use this for alternative models.

# 393    32    5.93    keep IRQ (645), KUW (690), USA (2)
# 133    31    5.74    keep USA (2), KON (731), CHN (710)
# 140    30    5.56    keep USA (2), KON (731), CHN (710)
# 132    17    3.15    keep USA (2), KON (731), CHN (710)

dat$Korea1950drop <- ifelse(dat$Korea1950 == 1 & dat$dyadid == 2731, 0, ifelse(dat$Korea1950 == 1 & dat$dyadid == 2710, 0, 1))
dat[dat$Korea1950drop == 1, ]$Korea1950drop <- ifelse(dat[dat$Korea1950drop == 1, ]$crisno == 132 | dat[dat$Korea1950drop == 1, ]$crisno == 133, 1, 0)
table(dat$Korea1950drop) # should be (17 + 31) - 4 = 44

dat$Korea1953drop <- ifelse(dat$Korea1953 == 1 & dat$dyadid == 2731, 0, ifelse(dat$Korea1953 == 1 & dat$dyadid == 2710, 0, 1))
dat[dat$Korea1953drop == 1, ]$Korea1953drop <- ifelse(dat[dat$Korea1953drop == 1, ]$crisno == 140, 1, 0)
table(dat$Korea1953drop) # should be 30 - 2 = 28

dat$Gulf1990drop <- ifelse(dat$Gulf1990 == 1 & dat$dyadid == 2645, 0, ifelse(dat$Gulf1990 == 1 & dat$dyadid == 645690, 0, 1))
dat[dat$Gulf1990drop == 1, ]$Gulf1990drop <- ifelse(dat[dat$Gulf1990drop == 1, ]$crisno == 393, 1, 0)
table(dat$Gulf1990drop) # should be 32 - 2 = 30

dat.noclus <- dat[dat$Korea1950drop == 0 & dat$Korea1953drop == 0 & dat$Gulf1990drop == 0, ]

dat.noclus_spec <- merge(x = dat.noclus, y = igo_spec, by.x = c("dyadid", "year"), by.y = c("dyadid", "year"), all.x = TRUE, all.y = FALSE)
dat.noclus_spec$hligo_noWB <- dat.noclus_spec$igo_lev3_count_use - dat.noclus_spec$io2400_bin
dat.noclus_spec$hligo_noWB <- ifelse(dat.noclus_spec$hligo_noWB < 0, 0, dat.noclus_spec$hligo_noWB)
dat.noclus_spec$hligo_noIMF <- dat.noclus_spec$igo_lev3_count_use - dat.noclus_spec$io2880_bin
dat.noclus_spec$hligo_noIMF <- ifelse(dat.noclus_spec$hligo_noIMF < 0, 0, dat.noclus_spec$hligo_noIMF)
dat.noclus_spec$hligo_noWBIMF <- dat.noclus_spec$igo_lev3_count_use - dat.noclus_spec$io2880_bin - dat.noclus_spec$io2400_bin
dat.noclus_spec$hligo_noWBIMF <- ifelse(dat.noclus_spec$hligo_noWBIMF < 0, 0, dat.noclus_spec$hligo_noWBIMF)
dat.noclus_spec$hligo_noIFAD <- dat.noclus_spec$igo_lev3_count_use - dat.noclus_spec$io2760_bin
dat.noclus_spec$hligo_noIFAD <- ifelse(dat.noclus_spec$hligo_noIFAD < 0, 0, dat.noclus_spec$hligo_noIFAD)